package storage

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"io/ioutil"
	"os"
	"path"
	"path/filepath"
	"reflect"
	"sort"
	"strings"
	"time"

	drivers "github.com/containers/storage/drivers"
	"github.com/containers/storage/pkg/archive"
	"github.com/containers/storage/pkg/idtools"
	"github.com/containers/storage/pkg/ioutils"
	"github.com/containers/storage/pkg/mount"
	"github.com/containers/storage/pkg/stringid"
	"github.com/containers/storage/pkg/system"
	"github.com/containers/storage/pkg/tarlog"
	"github.com/containers/storage/pkg/truncindex"
	"github.com/klauspost/pgzip"
	digest "github.com/opencontainers/go-digest"
	"github.com/opencontainers/selinux/go-selinux/label"
	"github.com/pkg/errors"
	"github.com/vbatts/tar-split/archive/tar"
	"github.com/vbatts/tar-split/tar/asm"
	"github.com/vbatts/tar-split/tar/storage"
)

const (
	tarSplitSuffix = ".tar-split.gz"
	incompleteFlag = "incomplete"
)

// A Layer is a record of a copy-on-write layer that's stored by the lower
// level graph driver.
type Layer struct {
	// ID is either one which was specified at create-time, or a random
	// value which was generated by the library.
	ID string `json:"id"`

	// Names is an optional set of user-defined convenience values.  The
	// layer can be referred to by its ID or any of its names.  Names are
	// unique among layers.
	Names []string `json:"names,omitempty"`

	// Parent is the ID of a layer from which this layer inherits data.
	Parent string `json:"parent,omitempty"`

	// Metadata is data we keep for the convenience of the caller.  It is not
	// expected to be large, since it is kept in memory.
	Metadata string `json:"metadata,omitempty"`

	// MountLabel is an SELinux label which should be used when attempting to mount
	// the layer.
	MountLabel string `json:"mountlabel,omitempty"`

	// MountPoint is the path where the layer is mounted, or where it was most
	// recently mounted.  This can change between subsequent Unmount() and
	// Mount() calls, so the caller should consult this value after Mount()
	// succeeds to find the location of the container's root filesystem.
	MountPoint string `json:"-"`

	// MountCount is used as a reference count for the container's layer being
	// mounted at the mount point.
	MountCount int `json:"-"`

	// Created is the datestamp for when this layer was created.  Older
	// versions of the library did not track this information, so callers
	// will likely want to use the IsZero() method to verify that a value
	// is set before using it.
	Created time.Time `json:"created,omitempty"`

	// CompressedDigest is the digest of the blob that was last passed to
	// ApplyDiff() or Put(), as it was presented to us.
	CompressedDigest digest.Digest `json:"compressed-diff-digest,omitempty"`

	// CompressedSize is the length of the blob that was last passed to
	// ApplyDiff() or Put(), as it was presented to us.  If
	// CompressedDigest is not set, this should be treated as if it were an
	// uninitialized value.
	CompressedSize int64 `json:"compressed-size,omitempty"`

	// UncompressedDigest is the digest of the blob that was last passed to
	// ApplyDiff() or Put(), after we decompressed it.  Often referred to
	// as a DiffID.
	UncompressedDigest digest.Digest `json:"diff-digest,omitempty"`

	// UncompressedSize is the length of the blob that was last passed to
	// ApplyDiff() or Put(), after we decompressed it.  If
	// UncompressedDigest is not set, this should be treated as if it were
	// an uninitialized value.
	UncompressedSize int64 `json:"diff-size,omitempty"`

	// CompressionType is the type of compression which we detected on the blob
	// that was last passed to ApplyDiff() or Put().
	CompressionType archive.Compression `json:"compression,omitempty"`

	// UIDs and GIDs are lists of UIDs and GIDs used in the layer.  This
	// field is only populated (i.e., will only contain one or more
	// entries) if the layer was created using ApplyDiff() or Put().
	UIDs []uint32 `json:"uidset,omitempty"`
	GIDs []uint32 `json:"gidset,omitempty"`

	// Flags is arbitrary data about the layer.
	Flags map[string]interface{} `json:"flags,omitempty"`

	// UIDMap and GIDMap are used for setting up a layer's contents
	// for use inside of a user namespace where UID mapping is being used.
	UIDMap []idtools.IDMap `json:"uidmap,omitempty"`
	GIDMap []idtools.IDMap `json:"gidmap,omitempty"`

	// ReadOnly is true if this layer resides in a read-only layer store.
	ReadOnly bool `json:"-"`
}

type layerMountPoint struct {
	ID         string `json:"id"`
	MountPoint string `json:"path"`
	MountCount int    `json:"count"`
}

// DiffOptions override the default behavior of Diff() methods.
type DiffOptions struct {
	// Compression, if set overrides the default compressor when generating a diff.
	Compression *archive.Compression
}

// ROLayerStore wraps a graph driver, adding the ability to refer to layers by
// name, and keeping track of parent-child relationships, along with a list of
// all known layers.
type ROLayerStore interface {
	ROFileBasedStore
	ROMetadataStore

	// Exists checks if a layer with the specified name or ID is known.
	Exists(id string) bool

	// Get retrieves information about a layer given an ID or name.
	Get(id string) (*Layer, error)

	// Status returns an slice of key-value pairs, suitable for human consumption,
	// relaying whatever status information the underlying driver can share.
	Status() ([][2]string, error)

	// Changes returns a slice of Change structures, which contain a pathname
	// (Path) and a description of what sort of change (Kind) was made by the
	// layer (either ChangeModify, ChangeAdd, or ChangeDelete), relative to a
	// specified layer.  By default, the layer's parent is used as a reference.
	Changes(from, to string) ([]archive.Change, error)

	// Diff produces a tarstream which can be applied to a layer with the contents
	// of the first layer to produce a layer with the contents of the second layer.
	// By default, the parent of the second layer is used as the first
	// layer, so it need not be specified.  Options can be used to override
	// default behavior, but are also not required.
	Diff(from, to string, options *DiffOptions) (io.ReadCloser, error)

	// DiffSize produces an estimate of the length of the tarstream which would be
	// produced by Diff.
	DiffSize(from, to string) (int64, error)

	// Size produces a cached value for the uncompressed size of the layer,
	// if one is known, or -1 if it is not known.  If the layer can not be
	// found, it returns an error.
	Size(name string) (int64, error)

	// Lookup attempts to translate a name to an ID.  Most methods do this
	// implicitly.
	Lookup(name string) (string, error)

	// LayersByCompressedDigest returns a slice of the layers with the
	// specified compressed digest value recorded for them.
	LayersByCompressedDigest(d digest.Digest) ([]Layer, error)

	// LayersByUncompressedDigest returns a slice of the layers with the
	// specified uncompressed digest value recorded for them.
	LayersByUncompressedDigest(d digest.Digest) ([]Layer, error)

	// Layers returns a slice of the known layers.
	Layers() ([]Layer, error)
}

// LayerStore wraps a graph driver, adding the ability to refer to layers by
// name, and keeping track of parent-child relationships, along with a list of
// all known layers.
type LayerStore interface {
	ROLayerStore
	RWFileBasedStore
	RWMetadataStore
	FlaggableStore

	// Create creates a new layer, optionally giving it a specified ID rather than
	// a randomly-generated one, either inheriting data from another specified
	// layer or the empty base layer.  The new layer can optionally be given names
	// and have an SELinux label specified for use when mounting it.  Some
	// underlying drivers can accept a "size" option.  At this time, most
	// underlying drivers do not themselves distinguish between writeable
	// and read-only layers.
	Create(id string, parent *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool) (*Layer, error)

	// CreateWithFlags combines the functions of Create and SetFlag.
	CreateWithFlags(id string, parent *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, flags map[string]interface{}) (layer *Layer, err error)

	// Put combines the functions of CreateWithFlags and ApplyDiff.
	Put(id string, parent *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, flags map[string]interface{}, diff io.Reader) (*Layer, int64, error)

	// SetNames replaces the list of names associated with a layer with the
	// supplied values.
	SetNames(id string, names []string) error

	// Delete deletes a layer with the specified name or ID.
	Delete(id string) error

	// Wipe deletes all layers.
	Wipe() error

	// Mount mounts a layer for use.  If the specified layer is the parent of other
	// layers, it should not be written to.  An SELinux label to be applied to the
	// mount can be specified to override the one configured for the layer.
	// The mappings used by the container can be specified.
	Mount(id string, options drivers.MountOpts) (string, error)

	// Unmount unmounts a layer when it is no longer in use.
	Unmount(id string, force bool) (bool, error)

	// Mounted returns number of times the layer has been mounted.
	Mounted(id string) (int, error)

	// ParentOwners returns the UIDs and GIDs of parents of the layer's mountpoint
	// for which the layer's UID and GID maps don't contain corresponding entries.
	ParentOwners(id string) (uids, gids []int, err error)

	// ApplyDiff reads a tarstream which was created by a previous call to Diff and
	// applies its changes to a specified layer.
	ApplyDiff(to string, diff io.Reader) (int64, error)

	// LoadLocked wraps Load in a locked state. This means it loads the store
	// and cleans-up invalid layers if needed.
	LoadLocked() error
}

type layerStore struct {
	lockfile          Locker
	mountsLockfile    Locker
	rundir            string
	driver            drivers.Driver
	layerdir          string
	layers            []*Layer
	idindex           *truncindex.TruncIndex
	byid              map[string]*Layer
	byname            map[string]*Layer
	bymount           map[string]*Layer
	bycompressedsum   map[digest.Digest][]string
	byuncompressedsum map[digest.Digest][]string
	uidMap            []idtools.IDMap
	gidMap            []idtools.IDMap
}

func copyLayer(l *Layer) *Layer {
	return &Layer{
		ID:                 l.ID,
		Names:              copyStringSlice(l.Names),
		Parent:             l.Parent,
		Metadata:           l.Metadata,
		MountLabel:         l.MountLabel,
		MountPoint:         l.MountPoint,
		MountCount:         l.MountCount,
		Created:            l.Created,
		CompressedDigest:   l.CompressedDigest,
		CompressedSize:     l.CompressedSize,
		UncompressedDigest: l.UncompressedDigest,
		UncompressedSize:   l.UncompressedSize,
		CompressionType:    l.CompressionType,
		ReadOnly:           l.ReadOnly,
		Flags:              copyStringInterfaceMap(l.Flags),
		UIDMap:             copyIDMap(l.UIDMap),
		GIDMap:             copyIDMap(l.GIDMap),
		UIDs:               copyUint32Slice(l.UIDs),
		GIDs:               copyUint32Slice(l.GIDs),
	}
}

func (r *layerStore) Layers() ([]Layer, error) {
	layers := make([]Layer, len(r.layers))
	for i := range r.layers {
		layers[i] = *copyLayer(r.layers[i])
	}
	return layers, nil
}

func (r *layerStore) mountspath() string {
	return filepath.Join(r.rundir, "mountpoints.json")
}

func (r *layerStore) layerspath() string {
	return filepath.Join(r.layerdir, "layers.json")
}

func (r *layerStore) Load() error {
	shouldSave := false
	rpath := r.layerspath()
	data, err := ioutil.ReadFile(rpath)
	if err != nil && !os.IsNotExist(err) {
		return err
	}
	layers := []*Layer{}
	idlist := []string{}
	ids := make(map[string]*Layer)
	names := make(map[string]*Layer)
	compressedsums := make(map[digest.Digest][]string)
	uncompressedsums := make(map[digest.Digest][]string)
	if r.IsReadWrite() {
		label.ClearLabels()
	}
	if err = json.Unmarshal(data, &layers); len(data) == 0 || err == nil {
		idlist = make([]string, 0, len(layers))
		for n, layer := range layers {
			ids[layer.ID] = layers[n]
			idlist = append(idlist, layer.ID)
			for _, name := range layer.Names {
				if conflict, ok := names[name]; ok {
					r.removeName(conflict, name)
					shouldSave = true
				}
				names[name] = layers[n]
			}
			if layer.CompressedDigest != "" {
				compressedsums[layer.CompressedDigest] = append(compressedsums[layer.CompressedDigest], layer.ID)
			}
			if layer.UncompressedDigest != "" {
				uncompressedsums[layer.UncompressedDigest] = append(uncompressedsums[layer.UncompressedDigest], layer.ID)
			}
			if layer.MountLabel != "" {
				label.ReserveLabel(layer.MountLabel)
			}
			layer.ReadOnly = !r.IsReadWrite()
		}
		err = nil
	}
	if shouldSave && (!r.IsReadWrite() || !r.Locked()) {
		return ErrDuplicateLayerNames
	}
	r.layers = layers
	r.idindex = truncindex.NewTruncIndex(idlist)
	r.byid = ids
	r.byname = names
	r.bycompressedsum = compressedsums
	r.byuncompressedsum = uncompressedsums

	// Load and merge information about which layers are mounted, and where.
	if r.IsReadWrite() {
		r.mountsLockfile.RLock()
		defer r.mountsLockfile.Unlock()
		if err = r.loadMounts(); err != nil {
			return err
		}

		// Last step: as we’re writable, try to remove anything that a previous
		// user of this storage area marked for deletion but didn't manage to
		// actually delete.
		if r.Locked() {
			for _, layer := range r.layers {
				if layer.Flags == nil {
					layer.Flags = make(map[string]interface{})
				}
				if cleanup, ok := layer.Flags[incompleteFlag]; ok {
					if b, ok := cleanup.(bool); ok && b {
						err = r.deleteInternal(layer.ID)
						if err != nil {
							break
						}
						shouldSave = true
					}
				}
			}
		}
		if shouldSave {
			return r.saveLayers()
		}
	}

	return err
}

func (r *layerStore) LoadLocked() error {
	r.lockfile.Lock()
	defer r.lockfile.Unlock()
	return r.Load()
}

func (r *layerStore) loadMounts() error {
	mounts := make(map[string]*Layer)
	mpath := r.mountspath()
	data, err := ioutil.ReadFile(mpath)
	if err != nil && !os.IsNotExist(err) {
		return err
	}
	layerMounts := []layerMountPoint{}
	if err = json.Unmarshal(data, &layerMounts); len(data) == 0 || err == nil {
		// Clear all of our mount information.  If another process
		// unmounted something, it (along with its zero count) won't
		// have been encoded into the version of mountpoints.json that
		// we're loading, so our count could fall out of sync with it
		// if we don't, and if we subsequently change something else,
		// we'd pass that error along to other process that reloaded
		// the data after we saved it.
		for _, layer := range r.layers {
			layer.MountPoint = ""
			layer.MountCount = 0
		}
		// All of the non-zero count values will have been encoded, so
		// we reset the still-mounted ones based on the contents.
		for _, mount := range layerMounts {
			if mount.MountPoint != "" {
				if layer, ok := r.lookup(mount.ID); ok {
					mounts[mount.MountPoint] = layer
					layer.MountPoint = mount.MountPoint
					layer.MountCount = mount.MountCount
				}
			}
		}
		err = nil
	}
	r.bymount = mounts
	return err
}

func (r *layerStore) Save() error {
	r.mountsLockfile.Lock()
	defer r.mountsLockfile.Unlock()
	defer r.mountsLockfile.Touch()
	if err := r.saveLayers(); err != nil {
		return err
	}
	return r.saveMounts()
}

func (r *layerStore) saveLayers() error {
	if !r.IsReadWrite() {
		return errors.Wrapf(ErrStoreIsReadOnly, "not allowed to modify the layer store at %q", r.layerspath())
	}
	if !r.Locked() {
		return errors.New("layer store is not locked for writing")
	}
	rpath := r.layerspath()
	if err := os.MkdirAll(filepath.Dir(rpath), 0700); err != nil {
		return err
	}
	jldata, err := json.Marshal(&r.layers)
	if err != nil {
		return err
	}
	defer r.Touch()
	return ioutils.AtomicWriteFile(rpath, jldata, 0600)
}

func (r *layerStore) saveMounts() error {
	if !r.IsReadWrite() {
		return errors.Wrapf(ErrStoreIsReadOnly, "not allowed to modify the layer store at %q", r.layerspath())
	}
	if !r.mountsLockfile.Locked() {
		return errors.New("layer store mount information is not locked for writing")
	}
	mpath := r.mountspath()
	if err := os.MkdirAll(filepath.Dir(mpath), 0700); err != nil {
		return err
	}
	mounts := make([]layerMountPoint, 0, len(r.layers))
	for _, layer := range r.layers {
		if layer.MountPoint != "" && layer.MountCount > 0 {
			mounts = append(mounts, layerMountPoint{
				ID:         layer.ID,
				MountPoint: layer.MountPoint,
				MountCount: layer.MountCount,
			})
		}
	}
	jmdata, err := json.Marshal(&mounts)
	if err != nil {
		return err
	}
	if err = ioutils.AtomicWriteFile(mpath, jmdata, 0600); err != nil {
		return err
	}
	return r.loadMounts()
}

func (s *store) newLayerStore(rundir string, layerdir string, driver drivers.Driver) (LayerStore, error) {
	if err := os.MkdirAll(rundir, 0700); err != nil {
		return nil, err
	}
	if err := os.MkdirAll(layerdir, 0700); err != nil {
		return nil, err
	}
	lockfile, err := GetLockfile(filepath.Join(layerdir, "layers.lock"))
	if err != nil {
		return nil, err
	}
	mountsLockfile, err := GetLockfile(filepath.Join(rundir, "mountpoints.lock"))
	if err != nil {
		return nil, err
	}
	rlstore := layerStore{
		lockfile:       lockfile,
		mountsLockfile: mountsLockfile,
		driver:         driver,
		rundir:         rundir,
		layerdir:       layerdir,
		byid:           make(map[string]*Layer),
		bymount:        make(map[string]*Layer),
		byname:         make(map[string]*Layer),
		uidMap:         copyIDMap(s.uidMap),
		gidMap:         copyIDMap(s.gidMap),
	}
	if err := rlstore.Load(); err != nil {
		return nil, err
	}
	return &rlstore, nil
}

func newROLayerStore(rundir string, layerdir string, driver drivers.Driver) (ROLayerStore, error) {
	lockfile, err := GetROLockfile(filepath.Join(layerdir, "layers.lock"))
	if err != nil {
		return nil, err
	}
	rlstore := layerStore{
		lockfile:       lockfile,
		mountsLockfile: nil,
		driver:         driver,
		rundir:         rundir,
		layerdir:       layerdir,
		byid:           make(map[string]*Layer),
		bymount:        make(map[string]*Layer),
		byname:         make(map[string]*Layer),
	}
	if err := rlstore.Load(); err != nil {
		return nil, err
	}
	return &rlstore, nil
}

func (r *layerStore) lookup(id string) (*Layer, bool) {
	if layer, ok := r.byid[id]; ok {
		return layer, ok
	} else if layer, ok := r.byname[id]; ok {
		return layer, ok
	} else if longid, err := r.idindex.Get(id); err == nil {
		layer, ok := r.byid[longid]
		return layer, ok
	}
	return nil, false
}

func (r *layerStore) Size(name string) (int64, error) {
	layer, ok := r.lookup(name)
	if !ok {
		return -1, ErrLayerUnknown
	}
	// We use the presence of a non-empty digest as an indicator that the size value was intentionally set, and that
	// a zero value is not just present because it was never set to anything else (which can happen if the layer was
	// created by a version of this library that didn't keep track of digest and size information).
	if layer.UncompressedDigest != "" {
		return layer.UncompressedSize, nil
	}
	return -1, nil
}

func (r *layerStore) ClearFlag(id string, flag string) error {
	if !r.IsReadWrite() {
		return errors.Wrapf(ErrStoreIsReadOnly, "not allowed to clear flags on layers at %q", r.layerspath())
	}
	layer, ok := r.lookup(id)
	if !ok {
		return ErrLayerUnknown
	}
	delete(layer.Flags, flag)
	return r.Save()
}

func (r *layerStore) SetFlag(id string, flag string, value interface{}) error {
	if !r.IsReadWrite() {
		return errors.Wrapf(ErrStoreIsReadOnly, "not allowed to set flags on layers at %q", r.layerspath())
	}
	layer, ok := r.lookup(id)
	if !ok {
		return ErrLayerUnknown
	}
	if layer.Flags == nil {
		layer.Flags = make(map[string]interface{})
	}
	layer.Flags[flag] = value
	return r.Save()
}

func (r *layerStore) Status() ([][2]string, error) {
	return r.driver.Status(), nil
}

func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, flags map[string]interface{}, diff io.Reader) (layer *Layer, size int64, err error) {
	if !r.IsReadWrite() {
		return nil, -1, errors.Wrapf(ErrStoreIsReadOnly, "not allowed to create new layers at %q", r.layerspath())
	}
	size = -1
	if err := os.MkdirAll(r.rundir, 0700); err != nil {
		return nil, -1, err
	}
	if err := os.MkdirAll(r.layerdir, 0700); err != nil {
		return nil, -1, err
	}
	if id == "" {
		id = stringid.GenerateRandomID()
		_, idInUse := r.byid[id]
		for idInUse {
			id = stringid.GenerateRandomID()
			_, idInUse = r.byid[id]
		}
	}
	if duplicateLayer, idInUse := r.byid[id]; idInUse {
		return duplicateLayer, -1, ErrDuplicateID
	}
	names = dedupeNames(names)
	for _, name := range names {
		if _, nameInUse := r.byname[name]; nameInUse {
			return nil, -1, ErrDuplicateName
		}
	}
	parent := ""
	if parentLayer != nil {
		parent = parentLayer.ID
	}
	var parentMappings, templateIDMappings, oldMappings *idtools.IDMappings
	if moreOptions.TemplateLayer != "" {
		templateLayer, ok := r.lookup(moreOptions.TemplateLayer)
		if !ok {
			return nil, -1, ErrLayerUnknown
		}
		templateIDMappings = idtools.NewIDMappingsFromMaps(templateLayer.UIDMap, templateLayer.GIDMap)
	} else {
		templateIDMappings = &idtools.IDMappings{}
	}
	if parentLayer != nil {
		parentMappings = idtools.NewIDMappingsFromMaps(parentLayer.UIDMap, parentLayer.GIDMap)
	} else {
		parentMappings = &idtools.IDMappings{}
	}
	if mountLabel != "" {
		label.ReserveLabel(mountLabel)
	}
	idMappings := idtools.NewIDMappingsFromMaps(moreOptions.UIDMap, moreOptions.GIDMap)
	opts := drivers.CreateOpts{
		MountLabel: mountLabel,
		StorageOpt: options,
		IDMappings: idMappings,
	}
	if moreOptions.TemplateLayer != "" {
		if err = r.driver.CreateFromTemplate(id, moreOptions.TemplateLayer, templateIDMappings, parent, parentMappings, &opts, writeable); err != nil {
			if id != "" {
				return nil, -1, errors.Wrapf(err, "error creating copy of template layer %q with ID %q", moreOptions.TemplateLayer, id)
			}
			return nil, -1, errors.Wrapf(err, "error creating copy of template layer %q", moreOptions.TemplateLayer)
		}
		oldMappings = templateIDMappings
	} else {
		if writeable {
			if err = r.driver.CreateReadWrite(id, parent, &opts); err != nil {
				if id != "" {
					return nil, -1, errors.Wrapf(err, "error creating read-write layer with ID %q", id)
				}
				return nil, -1, errors.Wrapf(err, "error creating read-write layer")
			}
		} else {
			if err = r.driver.Create(id, parent, &opts); err != nil {
				if id != "" {
					return nil, -1, errors.Wrapf(err, "error creating layer with ID %q", id)
				}
				return nil, -1, errors.Wrapf(err, "error creating layer")
			}
		}
		oldMappings = parentMappings
	}
	if !reflect.DeepEqual(oldMappings.UIDs(), idMappings.UIDs()) || !reflect.DeepEqual(oldMappings.GIDs(), idMappings.GIDs()) {
		if err = r.driver.UpdateLayerIDMap(id, oldMappings, idMappings, mountLabel); err != nil {
			// We don't have a record of this layer, but at least
			// try to clean it up underneath us.
			r.driver.Remove(id)
			return nil, -1, err
		}
	}
	if err == nil {
		layer = &Layer{
			ID:         id,
			Parent:     parent,
			Names:      names,
			MountLabel: mountLabel,
			Created:    time.Now().UTC(),
			Flags:      make(map[string]interface{}),
			UIDMap:     copyIDMap(moreOptions.UIDMap),
			GIDMap:     copyIDMap(moreOptions.GIDMap),
		}
		r.layers = append(r.layers, layer)
		r.idindex.Add(id)
		r.byid[id] = layer
		for _, name := range names {
			r.byname[name] = layer
		}
		for flag, value := range flags {
			layer.Flags[flag] = value
		}
		if diff != nil {
			layer.Flags[incompleteFlag] = true
			err = r.Save()
			if err != nil {
				// We don't have a record of this layer, but at least
				// try to clean it up underneath us.
				r.driver.Remove(id)
				return nil, -1, err
			}
			size, err = r.ApplyDiff(layer.ID, diff)
			if err != nil {
				if r.Delete(layer.ID) != nil {
					// Either a driver error or an error saving.
					// We now have a layer that's been marked for
					// deletion but which we failed to remove.
				}
				return nil, -1, err
			}
			delete(layer.Flags, incompleteFlag)
		}
		err = r.Save()
		if err != nil {
			// We don't have a record of this layer, but at least
			// try to clean it up underneath us.
			r.driver.Remove(id)
			return nil, -1, err
		}
		layer = copyLayer(layer)
	}
	return layer, size, err
}

func (r *layerStore) CreateWithFlags(id string, parent *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, flags map[string]interface{}) (layer *Layer, err error) {
	layer, _, err = r.Put(id, parent, names, mountLabel, options, moreOptions, writeable, flags, nil)
	return layer, err
}

func (r *layerStore) Create(id string, parent *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool) (layer *Layer, err error) {
	return r.CreateWithFlags(id, parent, names, mountLabel, options, moreOptions, writeable, nil)
}

func (r *layerStore) Mounted(id string) (int, error) {
	if !r.IsReadWrite() {
		return 0, errors.Wrapf(ErrStoreIsReadOnly, "no mount information for layers at %q", r.mountspath())
	}
	r.mountsLockfile.RLock()
	defer r.mountsLockfile.Unlock()
	if modified, err := r.mountsLockfile.Modified(); modified || err != nil {
		if err = r.loadMounts(); err != nil {
			return 0, err
		}
	}
	layer, ok := r.lookup(id)
	if !ok {
		return 0, ErrLayerUnknown
	}
	return layer.MountCount, nil
}

func (r *layerStore) Mount(id string, options drivers.MountOpts) (string, error) {
	if !r.IsReadWrite() {
		return "", errors.Wrapf(ErrStoreIsReadOnly, "not allowed to update mount locations for layers at %q", r.mountspath())
	}
	r.mountsLockfile.Lock()
	defer r.mountsLockfile.Unlock()
	if modified, err := r.mountsLockfile.Modified(); modified || err != nil {
		if err = r.loadMounts(); err != nil {
			return "", err
		}
	}
	defer r.mountsLockfile.Touch()
	layer, ok := r.lookup(id)
	if !ok {
		return "", ErrLayerUnknown
	}
	if layer.MountCount > 0 {
		mounted, err := mount.Mounted(layer.MountPoint)
		if err != nil {
			return "", err
		}
		// If the container is not mounted then we have a condition
		// where the kernel umounted the mount point. This means
		// that the mount count never got decremented.
		if mounted {
			layer.MountCount++
			return layer.MountPoint, r.saveMounts()
		}
	}
	if options.MountLabel == "" {
		options.MountLabel = layer.MountLabel
	}

	if (options.UidMaps != nil || options.GidMaps != nil) && !r.driver.SupportsShifting() {
		if !reflect.DeepEqual(options.UidMaps, layer.UIDMap) || !reflect.DeepEqual(options.GidMaps, layer.GIDMap) {
			return "", fmt.Errorf("cannot mount layer %v: shifting not enabled", layer.ID)
		}
	}
	mountpoint, err := r.driver.Get(id, options)
	if mountpoint != "" && err == nil {
		if layer.MountPoint != "" {
			delete(r.bymount, layer.MountPoint)
		}
		layer.MountPoint = filepath.Clean(mountpoint)
		layer.MountCount++
		r.bymount[layer.MountPoint] = layer
		err = r.saveMounts()
	}
	return mountpoint, err
}

func (r *layerStore) Unmount(id string, force bool) (bool, error) {
	if !r.IsReadWrite() {
		return false, errors.Wrapf(ErrStoreIsReadOnly, "not allowed to update mount locations for layers at %q", r.mountspath())
	}
	r.mountsLockfile.Lock()
	defer r.mountsLockfile.Unlock()
	if modified, err := r.mountsLockfile.Modified(); modified || err != nil {
		if err = r.loadMounts(); err != nil {
			return false, err
		}
	}
	defer r.mountsLockfile.Touch()
	layer, ok := r.lookup(id)
	if !ok {
		layerByMount, ok := r.bymount[filepath.Clean(id)]
		if !ok {
			return false, ErrLayerUnknown
		}
		layer = layerByMount
	}
	if force {
		layer.MountCount = 1
	}
	if layer.MountCount > 1 {
		layer.MountCount--
		return true, r.saveMounts()
	}
	err := r.driver.Put(id)
	if err == nil || os.IsNotExist(err) {
		if layer.MountPoint != "" {
			delete(r.bymount, layer.MountPoint)
		}
		layer.MountCount--
		layer.MountPoint = ""
		return false, r.saveMounts()
	}
	return true, err
}

func (r *layerStore) ParentOwners(id string) (uids, gids []int, err error) {
	if !r.IsReadWrite() {
		return nil, nil, errors.Wrapf(ErrStoreIsReadOnly, "no mount information for layers at %q", r.mountspath())
	}
	r.mountsLockfile.RLock()
	defer r.mountsLockfile.Unlock()
	if modified, err := r.mountsLockfile.Modified(); modified || err != nil {
		if err = r.loadMounts(); err != nil {
			return nil, nil, err
		}
	}
	layer, ok := r.lookup(id)
	if !ok {
		return nil, nil, ErrLayerUnknown
	}
	if len(layer.UIDMap) == 0 && len(layer.GIDMap) == 0 {
		// We're not using any mappings, so there aren't any unmapped IDs on parent directories.
		return nil, nil, nil
	}
	if layer.MountPoint == "" {
		// We don't know which directories to examine.
		return nil, nil, ErrLayerNotMounted
	}
	rootuid, rootgid, err := idtools.GetRootUIDGID(layer.UIDMap, layer.GIDMap)
	if err != nil {
		return nil, nil, errors.Wrapf(err, "error reading root ID values for layer %q", layer.ID)
	}
	m := idtools.NewIDMappingsFromMaps(layer.UIDMap, layer.GIDMap)
	fsuids := make(map[int]struct{})
	fsgids := make(map[int]struct{})
	for dir := filepath.Dir(layer.MountPoint); dir != "" && dir != string(os.PathSeparator); dir = filepath.Dir(dir) {
		st, err := system.Stat(dir)
		if err != nil {
			return nil, nil, errors.Wrapf(err, "error reading ownership of directory %q", dir)
		}
		lst, err := system.Lstat(dir)
		if err != nil {
			return nil, nil, errors.Wrapf(err, "error reading ownership of directory-in-case-it's-a-symlink %q", dir)
		}
		fsuid := int(st.UID())
		fsgid := int(st.GID())
		if _, _, err := m.ToContainer(idtools.IDPair{UID: fsuid, GID: rootgid}); err != nil {
			fsuids[fsuid] = struct{}{}
		}
		if _, _, err := m.ToContainer(idtools.IDPair{UID: rootuid, GID: fsgid}); err != nil {
			fsgids[fsgid] = struct{}{}
		}
		fsuid = int(lst.UID())
		fsgid = int(lst.GID())
		if _, _, err := m.ToContainer(idtools.IDPair{UID: fsuid, GID: rootgid}); err != nil {
			fsuids[fsuid] = struct{}{}
		}
		if _, _, err := m.ToContainer(idtools.IDPair{UID: rootuid, GID: fsgid}); err != nil {
			fsgids[fsgid] = struct{}{}
		}
	}
	for uid := range fsuids {
		uids = append(uids, uid)
	}
	for gid := range fsgids {
		gids = append(gids, gid)
	}
	if len(uids) > 1 {
		sort.Ints(uids)
	}
	if len(gids) > 1 {
		sort.Ints(gids)
	}
	return uids, gids, nil
}

func (r *layerStore) removeName(layer *Layer, name string) {
	layer.Names = stringSliceWithoutValue(layer.Names, name)
}

func (r *layerStore) SetNames(id string, names []string) error {
	if !r.IsReadWrite() {
		return errors.Wrapf(ErrStoreIsReadOnly, "not allowed to change layer name assignments at %q", r.layerspath())
	}
	names = dedupeNames(names)
	if layer, ok := r.lookup(id); ok {
		for _, name := range layer.Names {
			delete(r.byname, name)
		}
		for _, name := range names {
			if otherLayer, ok := r.byname[name]; ok {
				r.removeName(otherLayer, name)
			}
			r.byname[name] = layer
		}
		layer.Names = names
		return r.Save()
	}
	return ErrLayerUnknown
}

func (r *layerStore) Metadata(id string) (string, error) {
	if layer, ok := r.lookup(id); ok {
		return layer.Metadata, nil
	}
	return "", ErrLayerUnknown
}

func (r *layerStore) SetMetadata(id, metadata string) error {
	if !r.IsReadWrite() {
		return errors.Wrapf(ErrStoreIsReadOnly, "not allowed to modify layer metadata at %q", r.layerspath())
	}
	if layer, ok := r.lookup(id); ok {
		layer.Metadata = metadata
		return r.Save()
	}
	return ErrLayerUnknown
}

func (r *layerStore) tspath(id string) string {
	return filepath.Join(r.layerdir, id+tarSplitSuffix)
}

func (r *layerStore) deleteInternal(id string) error {
	if !r.IsReadWrite() {
		return errors.Wrapf(ErrStoreIsReadOnly, "not allowed to delete layers at %q", r.layerspath())
	}
	layer, ok := r.lookup(id)
	if !ok {
		return ErrLayerUnknown
	}
	id = layer.ID
	err := r.driver.Remove(id)
	if err == nil {
		os.Remove(r.tspath(id))
		delete(r.byid, id)
		for _, name := range layer.Names {
			delete(r.byname, name)
		}
		r.idindex.Delete(id)
		mountLabel := layer.MountLabel
		if layer.MountPoint != "" {
			delete(r.bymount, layer.MountPoint)
		}
		toDeleteIndex := -1
		for i, candidate := range r.layers {
			if candidate.ID == id {
				toDeleteIndex = i
				break
			}
		}
		if toDeleteIndex != -1 {
			// delete the layer at toDeleteIndex
			if toDeleteIndex == len(r.layers)-1 {
				r.layers = r.layers[:len(r.layers)-1]
			} else {
				r.layers = append(r.layers[:toDeleteIndex], r.layers[toDeleteIndex+1:]...)
			}
		}
		if mountLabel != "" {
			var found bool
			for _, candidate := range r.layers {
				if candidate.MountLabel == mountLabel {
					found = true
					break
				}
			}
			if !found {
				label.ReleaseLabel(mountLabel)
			}
		}
	}
	return err
}

func (r *layerStore) Delete(id string) error {
	layer, ok := r.lookup(id)
	if !ok {
		return ErrLayerUnknown
	}
	id = layer.ID
	// The layer may already have been explicitly unmounted, but if not, we
	// should try to clean that up before we start deleting anything at the
	// driver level.
	mountCount, err := r.Mounted(id)
	if err != nil {
		return errors.Wrapf(err, "error checking if layer %q is still mounted", id)
	}
	for mountCount > 0 {
		if _, err := r.Unmount(id, false); err != nil {
			return err
		}
		mountCount, err = r.Mounted(id)
		if err != nil {
			return errors.Wrapf(err, "error checking if layer %q is still mounted", id)
		}
	}
	if err := r.deleteInternal(id); err != nil {
		return err
	}
	return r.Save()
}

func (r *layerStore) Lookup(name string) (id string, err error) {
	if layer, ok := r.lookup(name); ok {
		return layer.ID, nil
	}
	return "", ErrLayerUnknown
}

func (r *layerStore) Exists(id string) bool {
	_, ok := r.lookup(id)
	return ok
}

func (r *layerStore) Get(id string) (*Layer, error) {
	if layer, ok := r.lookup(id); ok {
		return copyLayer(layer), nil
	}
	return nil, ErrLayerUnknown
}

func (r *layerStore) Wipe() error {
	if !r.IsReadWrite() {
		return errors.Wrapf(ErrStoreIsReadOnly, "not allowed to delete layers at %q", r.layerspath())
	}
	ids := make([]string, 0, len(r.byid))
	for id := range r.byid {
		ids = append(ids, id)
	}
	for _, id := range ids {
		if err := r.Delete(id); err != nil {
			return err
		}
	}
	return nil
}

func (r *layerStore) findParentAndLayer(from, to string) (fromID string, toID string, fromLayer, toLayer *Layer, err error) {
	var ok bool
	toLayer, ok = r.lookup(to)
	if !ok {
		return "", "", nil, nil, ErrLayerUnknown
	}
	to = toLayer.ID
	if from == "" {
		from = toLayer.Parent
	}
	if from != "" {
		fromLayer, ok = r.lookup(from)
		if ok {
			from = fromLayer.ID
		} else {
			fromLayer, ok = r.lookup(toLayer.Parent)
			if ok {
				from = fromLayer.ID
			}
		}
	}
	return from, to, fromLayer, toLayer, nil
}

func (r *layerStore) layerMappings(layer *Layer) *idtools.IDMappings {
	if layer == nil {
		return &idtools.IDMappings{}
	}
	return idtools.NewIDMappingsFromMaps(layer.UIDMap, layer.GIDMap)
}

func (r *layerStore) Changes(from, to string) ([]archive.Change, error) {
	from, to, fromLayer, toLayer, err := r.findParentAndLayer(from, to)
	if err != nil {
		return nil, ErrLayerUnknown
	}
	return r.driver.Changes(to, r.layerMappings(toLayer), from, r.layerMappings(fromLayer), toLayer.MountLabel)
}

type simpleGetCloser struct {
	r    *layerStore
	path string
	id   string
}

func (s *simpleGetCloser) Get(path string) (io.ReadCloser, error) {
	return os.Open(filepath.Join(s.path, path))
}

func (s *simpleGetCloser) Close() error {
	_, err := s.r.Unmount(s.id, false)
	return err
}

func (r *layerStore) newFileGetter(id string) (drivers.FileGetCloser, error) {
	if getter, ok := r.driver.(drivers.DiffGetterDriver); ok {
		return getter.DiffGetter(id)
	}
	path, err := r.Mount(id, drivers.MountOpts{})
	if err != nil {
		return nil, err
	}
	return &simpleGetCloser{
		r:    r,
		path: path,
		id:   id,
	}, nil
}

func (r *layerStore) Diff(from, to string, options *DiffOptions) (io.ReadCloser, error) {
	var metadata storage.Unpacker

	from, to, fromLayer, toLayer, err := r.findParentAndLayer(from, to)
	if err != nil {
		return nil, ErrLayerUnknown
	}
	// Default to applying the type of compression that we noted was used
	// for the layerdiff when it was applied.
	compression := toLayer.CompressionType
	// If a particular compression type (or no compression) was selected,
	// use that instead.
	if options != nil && options.Compression != nil {
		compression = *options.Compression
	}
	maybeCompressReadCloser := func(rc io.ReadCloser) (io.ReadCloser, error) {
		// Depending on whether or not compression is desired, return either the
		// passed-in ReadCloser, or a new one that provides its readers with a
		// compressed version of the data that the original would have provided
		// to its readers.
		if compression == archive.Uncompressed {
			return rc, nil
		}
		preader, pwriter := io.Pipe()
		compressor, err := archive.CompressStream(pwriter, compression)
		if err != nil {
			rc.Close()
			pwriter.Close()
			preader.Close()
			return nil, err
		}
		go func() {
			defer pwriter.Close()
			defer compressor.Close()
			defer rc.Close()
			io.Copy(compressor, rc)
		}()
		return preader, nil
	}

	if from != toLayer.Parent {
		diff, err := r.driver.Diff(to, r.layerMappings(toLayer), from, r.layerMappings(fromLayer), toLayer.MountLabel)
		if err != nil {
			return nil, err
		}
		return maybeCompressReadCloser(diff)
	}

	tsfile, err := os.Open(r.tspath(to))
	if err != nil {
		if !os.IsNotExist(err) {
			return nil, err
		}
		diff, err := r.driver.Diff(to, r.layerMappings(toLayer), from, r.layerMappings(fromLayer), toLayer.MountLabel)
		if err != nil {
			return nil, err
		}
		return maybeCompressReadCloser(diff)
	}
	defer tsfile.Close()

	decompressor, err := pgzip.NewReader(tsfile)
	if err != nil {
		return nil, err
	}
	defer decompressor.Close()

	tsbytes, err := ioutil.ReadAll(decompressor)
	if err != nil {
		return nil, err
	}

	metadata = storage.NewJSONUnpacker(bytes.NewBuffer(tsbytes))

	fgetter, err := r.newFileGetter(to)
	if err != nil {
		return nil, err
	}

	tarstream := asm.NewOutputTarStream(fgetter, metadata)
	rc := ioutils.NewReadCloserWrapper(tarstream, func() error {
		err1 := tarstream.Close()
		err2 := fgetter.Close()
		if err2 == nil {
			return err1
		}
		return err2
	})
	return maybeCompressReadCloser(rc)
}

func (r *layerStore) DiffSize(from, to string) (size int64, err error) {
	var fromLayer, toLayer *Layer
	from, to, fromLayer, toLayer, err = r.findParentAndLayer(from, to)
	if err != nil {
		return -1, ErrLayerUnknown
	}
	return r.driver.DiffSize(to, r.layerMappings(toLayer), from, r.layerMappings(fromLayer), toLayer.MountLabel)
}

func (r *layerStore) ApplyDiff(to string, diff io.Reader) (size int64, err error) {
	if !r.IsReadWrite() {
		return -1, errors.Wrapf(ErrStoreIsReadOnly, "not allowed to modify layer contents at %q", r.layerspath())
	}

	layer, ok := r.lookup(to)
	if !ok {
		return -1, ErrLayerUnknown
	}

	header := make([]byte, 10240)
	n, err := diff.Read(header)
	if err != nil && err != io.EOF {
		return -1, err
	}

	compression := archive.DetectCompression(header[:n])
	compressedDigest := digest.Canonical.Digester()
	compressedCounter := ioutils.NewWriteCounter(compressedDigest.Hash())
	defragmented := io.TeeReader(io.MultiReader(bytes.NewBuffer(header[:n]), diff), compressedCounter)

	tsdata := bytes.Buffer{}
	compressor, err := pgzip.NewWriterLevel(&tsdata, pgzip.BestSpeed)
	if err != nil {
		compressor = pgzip.NewWriter(&tsdata)
	}
	metadata := storage.NewJSONPacker(compressor)
	uncompressed, err := archive.DecompressStream(defragmented)
	if err != nil {
		return -1, err
	}
	uncompressedDigest := digest.Canonical.Digester()
	uncompressedCounter := ioutils.NewWriteCounter(uncompressedDigest.Hash())
	uidLog := make(map[uint32]struct{})
	gidLog := make(map[uint32]struct{})
	idLogger, err := tarlog.NewLogger(func(h *tar.Header) {
		if !strings.HasPrefix(path.Base(h.Name), archive.WhiteoutPrefix) {
			uidLog[uint32(h.Uid)] = struct{}{}
			gidLog[uint32(h.Gid)] = struct{}{}
		}
	})
	if err != nil {
		return -1, err
	}
	payload, err := asm.NewInputTarStream(io.TeeReader(uncompressed, io.MultiWriter(uncompressedCounter, idLogger)), metadata, storage.NewDiscardFilePutter())
	if err != nil {
		return -1, err
	}
	options := drivers.ApplyDiffOpts{
		Diff:       payload,
		Mappings:   r.layerMappings(layer),
		MountLabel: layer.MountLabel,
	}
	size, err = r.driver.ApplyDiff(layer.ID, layer.Parent, options)
	if err != nil {
		return -1, err
	}
	compressor.Close()
	idLogger.Close()
	if err == nil {
		if err := os.MkdirAll(filepath.Dir(r.tspath(layer.ID)), 0700); err != nil {
			return -1, err
		}
		if err := ioutils.AtomicWriteFile(r.tspath(layer.ID), tsdata.Bytes(), 0600); err != nil {
			return -1, err
		}
	}

	updateDigestMap := func(m *map[digest.Digest][]string, oldvalue, newvalue digest.Digest, id string) {
		var newList []string
		if oldvalue != "" {
			for _, value := range (*m)[oldvalue] {
				if value != id {
					newList = append(newList, value)
				}
			}
			if len(newList) > 0 {
				(*m)[oldvalue] = newList
			} else {
				delete(*m, oldvalue)
			}
		}
		if newvalue != "" {
			(*m)[newvalue] = append((*m)[newvalue], id)
		}
	}
	updateDigestMap(&r.bycompressedsum, layer.CompressedDigest, compressedDigest.Digest(), layer.ID)
	layer.CompressedDigest = compressedDigest.Digest()
	layer.CompressedSize = compressedCounter.Count
	updateDigestMap(&r.byuncompressedsum, layer.UncompressedDigest, uncompressedDigest.Digest(), layer.ID)
	layer.UncompressedDigest = uncompressedDigest.Digest()
	layer.UncompressedSize = uncompressedCounter.Count
	layer.CompressionType = compression
	layer.UIDs = make([]uint32, 0, len(uidLog))
	for uid := range uidLog {
		layer.UIDs = append(layer.UIDs, uid)
	}
	sort.Slice(layer.UIDs, func(i, j int) bool {
		return layer.UIDs[i] < layer.UIDs[j]
	})
	layer.GIDs = make([]uint32, 0, len(gidLog))
	for gid := range gidLog {
		layer.GIDs = append(layer.GIDs, gid)
	}
	sort.Slice(layer.GIDs, func(i, j int) bool {
		return layer.GIDs[i] < layer.GIDs[j]
	})

	err = r.Save()

	return size, err
}

func (r *layerStore) layersByDigestMap(m map[digest.Digest][]string, d digest.Digest) ([]Layer, error) {
	var layers []Layer
	for _, layerID := range m[d] {
		layer, ok := r.lookup(layerID)
		if !ok {
			return nil, ErrLayerUnknown
		}
		layers = append(layers, *copyLayer(layer))
	}
	return layers, nil
}

func (r *layerStore) LayersByCompressedDigest(d digest.Digest) ([]Layer, error) {
	return r.layersByDigestMap(r.bycompressedsum, d)
}

func (r *layerStore) LayersByUncompressedDigest(d digest.Digest) ([]Layer, error) {
	return r.layersByDigestMap(r.byuncompressedsum, d)
}

func (r *layerStore) Lock() {
	r.lockfile.Lock()
}

func (r *layerStore) RecursiveLock() {
	r.lockfile.RecursiveLock()
}

func (r *layerStore) RLock() {
	r.lockfile.RLock()
}

func (r *layerStore) Unlock() {
	r.lockfile.Unlock()
}

func (r *layerStore) Touch() error {
	return r.lockfile.Touch()
}

func (r *layerStore) Modified() (bool, error) {
	var mmodified bool
	lmodified, err := r.lockfile.Modified()
	if err != nil {
		return lmodified, err
	}
	if r.IsReadWrite() {
		r.mountsLockfile.RLock()
		defer r.mountsLockfile.Unlock()
		mmodified, err = r.mountsLockfile.Modified()
		if err != nil {
			return lmodified, err
		}
	}
	return lmodified || mmodified, nil
}

func (r *layerStore) IsReadWrite() bool {
	return r.lockfile.IsReadWrite()
}

func (r *layerStore) TouchedSince(when time.Time) bool {
	return r.lockfile.TouchedSince(when)
}

func (r *layerStore) Locked() bool {
	return r.lockfile.Locked()
}
